#-------------------------------------------------------------------------------
# Replication code for Survey 3

# Packages
library(tidyverse); library(rio); library(cowplot); library(texreg)

# Set directory or change path to saved data
# Lines that save figures are commented out

# Data
# Respondents who did not consent already removed
tu <- import("survey3.csv")

#-------------------------------------------------------------------------------
# Initial cleaning

# Variable names to lower
colnames(tu) <- tolower(colnames(tu))

# Create and recode variables
tu <- tu %>%
  mutate(minutes = as.numeric(duration)/60, 
         pass_manip = ifelse(str_detect(q126, "A tax rebate"), 1, 0),
         believe = ifelse(str_detect(q127, "unbeliev"), 0, 1)) %>%
  mutate(across(q1_2:duration, ~ifelse(. == "", NA, .))) %>%
  
  # Treatment manipulation
  mutate(manipulation = case_when(!is.na(q99_click_count)  ~ "Baseline",
                                  !is.na(q117_click_count)  ~ "Time Estimates",
                                  T ~ NA_character_),
         # DVs
         # Believe eligible
         eligible_char = coalesce(q2_2, q122),
         eligible = ifelse(eligible_char == "Yes", 1, 0),
         # Likely apply
         apply_char = coalesce(q4_1, q123_1),
         apply = case_when(apply_char == "Strongly disagree" ~ 1, 
                           apply_char == "Disagree" ~ 2, 
                           apply_char == "Somewhat disagree" ~ 3, 
                           apply_char == "Neither agree nor disagree" ~ 4,
                           apply_char == "Somewhat agree" ~ 5, 
                           apply_char == "Agree" ~ 6,
                           apply_char == "Strongly agree" ~ 7,
                           T ~ NA_real_),
         # Helpful for people like me
         helpful_char = coalesce(q4_2, q123_2),
         helpful = case_when(helpful_char == "Strongly disagree" ~ 1, 
                             helpful_char == "Disagree" ~ 2, 
                             helpful_char == "Somewhat disagree" ~ 3, 
                             helpful_char == "Neither agree nor disagree" ~ 4,
                             helpful_char == "Somewhat agree" ~ 5, 
                             helpful_char == "Agree" ~ 6,
                             helpful_char == "Strongly agree" ~ 7,
                             T ~ NA_real_),
         # Collecting information worth my time
         worth_char = coalesce(q4_3, q123_3),
         worth = case_when(worth_char == "Strongly disagree" ~ 1, 
                           worth_char == "Disagree" ~ 2, 
                           worth_char == "Somewhat disagree" ~ 3, 
                           worth_char == "Neither agree nor disagree" ~ 4,
                           worth_char == "Somewhat agree" ~ 5, 
                           worth_char == "Agree" ~ 6,
                           worth_char == "Strongly agree" ~ 7,
                           T ~ NA_real_),
         # Learning more about program
         learn_char = coalesce(q4_4, q123_4),
         learn = case_when(learn_char == "Strongly disagree" ~ 1, 
                           learn_char == "Disagree" ~ 2, 
                           learn_char == "Somewhat disagree" ~ 3, 
                           learn_char == "Neither agree nor disagree" ~ 4,
                           learn_char == "Somewhat agree" ~ 5, 
                           learn_char == "Agree" ~ 6,
                           learn_char == "Strongly agree" ~ 7,
                           T ~ NA_real_),
         # Dichotomize
         apply_di = ifelse(apply >= 5, 1, 0),
         helpful_di = ifelse(helpful >= 5, 1, 0),
         worth_di = ifelse(worth >= 5, 1, 0),
         learn_di = ifelse(learn >= 5, 1, 0),
         manip_agree_char = coalesce(q4_5, q123_5),
         manip_agree = ifelse(manip_agree_char == "Agree", 1, 0),
         # Enough support
         enough_char = coalesce(q5_2, q124), 
         # Check coding again
         not_enough = ifelse(str_detect(enough_char, "too little"), 1, 0),
         # Time estimations for each documentation
         income_est_time = coalesce(q119_1_text, q128_1_text),
         income_no_est_time = coalesce(q119, q128),
         id_est_time = coalesce(q120_1_text, q129_1_text),
         id_no_est_time = coalesce(q120, q129),
         residence_est_time = coalesce(q121_1_text, q130_1_text),
         residence_no_est_time = coalesce(q121, q130),
         # Race
         race = case_when(ethnicity == 1 ~ "White",
                          ethnicity == 2 ~ "Black",
                          ethnicity == 3 ~ "American Indian",
                          ethnicity %in% seq(4, 14, 1) ~ "Asian/Pacific Islander",
                          ethnicity == 15 ~ "Other",
                          T ~ "Other"),
         race = ifelse(hispanic == 1, race, "Hispanic"),
         female = ifelse(gender == 2, "Female", "Male"),
         education = ifelse(education < 1, NA, education),
         hhi = ifelse(hhi == 25, NA, hhi),
         party = q1_2
  )

# Add additional indicators 
tu <- tu %>%
  mutate(manipulation_time = case_when(manipulation == "Baseline" ~ "Baseline",
                                       !is.na(q119_1_text) & 
                                         !is.na(q120_1_text) & 
                                         !is.na(q121_1_text) ~ "All Documents",
                                       T ~ "Missing Documents"))
table(tu$manipulation, tu$manipulation_time)
#-------------------------------------------------------------------------------
# Summary information 

# Timing
summary(tu$minutes)
median(tu$minutes)
table(tu$minutes>30)[2]
table(tu$minutes>15)[2]
table(tu$minutes<3)[2]

# Indicator for fewer than 3 minutes
tu <- tu %>%
  mutate(exclude_time = ifelse(minutes < 3, 1, 0))

# N per treatment
table(tu$manipulation)

# Manipulation check
table(tu$manipulation, tu$pass_manip)
sum(table(tu$manipulation, tu$pass_manip)[,2])/sum(table(tu$manipulation, tu$pass_manip))
table(tu$manipulation, tu$manip_agree)
sum(table(tu$manipulation, tu$manip_agree)[,2])/sum(table(tu$manipulation, tu$manip_agree))

# Believability
table(tu$believe)
prop.table(table(tu$manipulation, tu$believe), 1)

#------------------
# Means for each DV
tu %>%
  select(eligible, apply, helpful, worth, learn, not_enough) %>%
  summary()
tu %>%
  select(eligible, apply, helpful, worth, learn, not_enough) %>%
  summarize(across(everything(), ~ sd(.x, na.rm = TRUE)))

#-------------------------------------------------------------------------------
# Distributions

# Reshape to long format
long_tu <- tu %>%
  select(apply, helpful, worth, learn) %>%
  pivot_longer(cols = everything(), names_to = "question", values_to = "response") %>%
  filter(!is.na(response))

# Add labels
long_tu <- long_tu %>%
  mutate(
    question = recode(question,
                      apply = "Apply",
                      helpful = "Helpful for Me",
                      worth = "Worth Time",
                      learn = "Learn More"),
    response = factor(response, levels = 1:7,
                      labels = c("Strongly Disagree", "Disagree", "Somewhat Disagree",
                                 "Neither", "Somewhat Agree", "Agree", "Strongly Agree"))
  )

# Create the plot
ggplot(long_tu, aes(x = response)) +
  geom_bar(position = "dodge") +
  facet_wrap(~ question, ncol = 1) +
  labs(x = "", y = "Number of Respondents") +
  theme_bw(base_size = 12) +
  theme(legend.position = "none",
        axis.text.x = element_text(angle = 45, hjust = 1))
# ggsave"distribution_s3.pdf", width = 10, height = 8)

# Calculate percent in each response category for each question
percent_by_category <- long_tu %>%
  group_by(question, response) %>%
  summarise(n = n(), .groups = "drop") %>%
  group_by(question) %>%
  mutate(pct = round(100 * n / sum(n), 1))  

#-------------------------------------------------------------------------------
# Difference of means

# apply, helpful, worth, learn

# Function to grab the difference of means between two variables
dif.of.means.fun = function(var1, var2){
  temp <- t.test(var1, var2)
  p <- temp$p.value
  mean1 <- temp$estimate[1]
  mean2 <- temp$estimate[2]
  lower <- temp$conf.int[1]
  upper <- temp$conf.int[2]
  temp2 <- t.test(var1, var2,conf.level = 0.9)
  lower90 <- temp2$conf.int[1]
  upper90 <- temp2$conf.int[2]
  difference <- mean1 - mean2 
  out <- data.frame("mean1" = mean1,
                    "mean2"= mean2,
                    "difference" = difference,
                    "lower95" = lower,
                    "upper95" = upper,
                    "lower90" = lower90,
                    "upper90" = upper90,
                    "p" = p)
  return(out)
}

# Sanity check
dif.of.means.fun(tu$apply[tu$manipulation == "Time Estimates"],
                 tu$apply[tu$manipulation == "Baseline"])

# All treatments and DVs
dif.of.means.df <- bind_rows(
  # apply
  dif.of.means.fun(tu$apply[tu$manipulation == "Time Estimates"],
                   tu$apply[tu$manipulation == "Baseline"]),
  dif.of.means.fun(tu$apply[tu$manipulation_time == "All Documents"],
                   tu$apply[tu$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu$apply[tu$manipulation_time == "Missing Documents"],
                   tu$apply[tu$manipulation_time == "Baseline"]),
  # helpful
  dif.of.means.fun(tu$helpful[tu$manipulation == "Time Estimates"],
                   tu$helpful[tu$manipulation == "Baseline"]),
  dif.of.means.fun(tu$helpful[tu$manipulation_time == "All Documents"],
                   tu$helpful[tu$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu$helpful[tu$manipulation_time == "Missing Documents"],
                   tu$helpful[tu$manipulation_time == "Baseline"]),
  # worth
  dif.of.means.fun(tu$worth[tu$manipulation == "Time Estimates"],
                   tu$worth[tu$manipulation == "Baseline"]),
  dif.of.means.fun(tu$worth[tu$manipulation_time == "All Documents"],
                   tu$worth[tu$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu$worth[tu$manipulation_time == "Missing Documents"],
                   tu$worth[tu$manipulation_time == "Baseline"]),
  # learn
  dif.of.means.fun(tu$learn[tu$manipulation == "Time Estimates"],
                   tu$learn[tu$manipulation == "Baseline"]),
  dif.of.means.fun(tu$learn[tu$manipulation_time == "All Documents"],
                   tu$learn[tu$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu$learn[tu$manipulation_time == "Missing Documents"],
                   tu$learn[tu$manipulation_time == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 3), rep(2, 3), rep(3, 3), rep(4, 3))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 3), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:3, 
                                  labels = c("Time Estimates - \nBaseline",
                                             "All Documents - \nBaseline",
                                             "Missing Documents - \nBaseline"))

# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                           aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
  geom_errorbar(width=0, linewidth=1.5, color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

#-------------------------------------------------------------------------------
# Difference of proportions

# eligible, not_enough

# Function to grab difference of proportions
prop.test.values <- function(generated.table, rounding.number = 3){
  n <- sum(generated.table)
  temp <- prop.test(generated.table)
  prop.1 <- temp$estimate[1]
  prop.2 <- temp$estimate[2]
  dif <- prop.1 - prop.2
  lower <- temp$conf.int[1]
  upper <- temp$conf.int[2]
  p <- temp$p.value
  temp2 <- prop.test(generated.table,conf.level = 0.9)
  lower90 <- temp2$conf.int[1]
  upper90 <- temp2$conf.int[2]
  out <- data.frame("n" = n,
                    "prop1" = prop.1,
                    "prop2" = prop.2,
                    "difference" = dif,
                    "lower95" = lower,
                    "upper95" = upper,
                    "lower90" = lower90,
                    "upper90" = upper90,
                    "p" = p)
  return(out)
}

# Sanity check
prop.test.values(table(tu$manipulation, tu$eligible)[c(1,2), ])

# Save as table
tab.elig <- table(tu$manipulation, tu$eligible)
tab.support <- table(tu$manipulation, tu$not_enough)
tab.elig2 <- table(tu$manipulation_time, tu$eligible)
tab.support2 <- table(tu$manipulation_time, tu$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig2[c(1, 2),]),
                       prop.test.values(tab.elig2[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support2[c(1, 2),]),
                       prop.test.values(tab.support2[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- "Time Estimates - \nBaseline"
diff.prop$condition <- c("Eligible for Program", "Not Enough Support")

# Add in comparison text
diff.prop$compare <- c(rep(c("Time Estimates - \nBaseline",
                             "All Documents - \nBaseline",
                             "Missing Documents - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "Time Estimates - \nBaseline",
                                 "All Documents - \nBaseline",
                                 "Missing Documents - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 3), rep("Not Enough Support", 3))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
  geom_errorbar(width=0,linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave"figure8.png", width = 12, height = 7, dpi = 300)

#-------------------------------------------------------------------------------
# Time estimation
# Treatment group and post-outcome measure

# Calculate the median and mean for each group
stats <- tu %>%
  filter(between(income_est_time, 0, 120),
         between(id_est_time, 0, 120),
         between(residence_est_time, 0, 120)) %>%
  group_by(manipulation) %>%
  summarize(
    median_income = median(income_est_time, na.rm = TRUE),
    mean_income = mean(income_est_time, na.rm = TRUE),
    sd_income = sd(income_est_time, na.rm   = TRUE),
    median_id = median(id_est_time, na.rm = TRUE),
    mean_id = mean(id_est_time, na.rm = TRUE),
    sd_id = sd(id_est_time, na.rm   = TRUE),
    median_residence = median(residence_est_time, na.rm = TRUE),
    mean_residence = mean(residence_est_time, na.rm = TRUE),
    sd_residence = sd(residence_est_time, na.rm   = TRUE)
  ) %>%
  mutate(manipulation = ifelse(manipulation == "Baseline", "Post-Outcome Estimates", "Treatment Group Estimates"))

# Make long
stats <- stats %>%
  pivot_longer(
    cols = -manipulation,
    names_to = "stat_doc",
    values_to = "value"
  ) %>%
  separate(stat_doc, into = c("stat_type", "document_type"), sep = "_") %>%
  mutate(
    document_type = recode(document_type,
                           "income" = "Proof of Income",
                           "id" = "Proof of Identity",
                           "residence" = "Proof of Residence"),
    stat_type = recode(stat_type,
                       "mean" = "Mean",
                       "median" = "Median")
  )

# Convert to long for histograms 
tu.long <- tu %>%
  filter(between(income_est_time, 0, 120),
         between(id_est_time, 0, 120),
         between(residence_est_time, 0, 120)) %>%
  mutate(manipulation = ifelse(manipulation == "Baseline", "Post-Outcome Estimates", "Treatment Group Estimates")) %>%
  pivot_longer(
    cols = c(income_est_time, id_est_time, residence_est_time),
    names_to = "document_type",
    values_to = "time_minutes"
  ) %>%
  mutate(
    document_type = recode(document_type,
                           income_est_time = "Proof of Income",
                           id_est_time = "Proof of Identity",
                           residence_est_time = "Proof of Residence")
  )

# For plot
median_lines <- stats %>%
  filter(stat_type == "Median")

# For text
labels_data <- stats %>%
  pivot_wider(names_from = stat_type, values_from = value)

# Plot the histogram with a vertical line for the median and text labels for both median and mean
ggplot(tu.long, aes(x = time_minutes)) +
  geom_histogram(binwidth = 5, position = "identity", alpha = 0.5) +
  facet_grid(document_type ~ manipulation) +
  # Median line only
  geom_vline(data = median_lines, aes(xintercept = value), color = "black", linetype = "dashed", linewidth = 1) +
  geom_text(
    data = labels_data, 
    aes(x = Median, y = Inf, label = paste("Median:", round(Median, 1))), 
    vjust = 3, hjust = -.5, color = "black", size = 4
  ) +
  geom_text(
    data = labels_data, 
    aes(x = Median, y = Inf, label = paste("Mean:", round(Mean, 1))), 
    vjust = 5, hjust = -.5, color = "darkblue", size = 4
  ) +
  labs(
    x = "Estimated Time to Locate Documentation (minutes)",
    y = "") +
  theme_bw(base_size = 14)
# ggsave"figure7.png", width = 9, height = 7, dpi = 300)

#-------------------------------------------------------------------------------
# Robustness checks
#-------------------------------------------------------------------------------

#---------------------------------
# Dichotomous DVs 
# apply, helpful, worth, learn

# Sanity check
prop.test.values(table(tu$manipulation, tu$apply_di)[c(1,2), ])

# Save as table
tab.apply <- table(tu$manipulation, tu$apply_di)
tab.helpful <- table(tu$manipulation, tu$helpful_di)
tab.worth <- table(tu$manipulation, tu$worth_di)
tab.learn <- table(tu$manipulation, tu$learn_di)
tab.apply2 <- table(tu$manipulation_time, tu$apply_di)
tab.helpful2 <- table(tu$manipulation_time, tu$helpful_di)
tab.worth2 <- table(tu$manipulation_time, tu$worth_di)
tab.learn2 <- table(tu$manipulation_time, tu$learn_di)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.apply[c(1, 2),]),
                       prop.test.values(tab.apply2[c(1, 2),]),
                       prop.test.values(tab.apply2[c(1, 3),]),
                       prop.test.values(tab.helpful[c(1, 2),]),
                       prop.test.values(tab.helpful2[c(1, 2),]),
                       prop.test.values(tab.helpful2[c(1, 3),]),
                       prop.test.values(tab.worth[c(1, 2),]),
                       prop.test.values(tab.worth2[c(1, 2),]),
                       prop.test.values(tab.worth2[c(1, 2),]),
                       prop.test.values(tab.learn[c(1, 2),]),
                       prop.test.values(tab.learn2[c(1, 2),]),
                       prop.test.values(tab.learn2[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- c(rep(c("Time Estimates - \nBaseline",
                             "All Documents - \nBaseline",
                             "Missing Documents - \nBaseline"), 4))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "Time Estimates - \nBaseline",
                                 "All Documents - \nBaseline",
                                 "Missing Documents - \nBaseline")
diff.prop$condition <- c(rep("Apply", 3), rep("Helpful for Me", 3), 
                         rep("Worth Time", 3), rep("Learn More", 3))

# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
  geom_errorbar(width=0,linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition, ncol = 2) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph
# ggsave"di_dv_s3.pdf", width = 14, height = 7)

# See where differences are 
# Convert to long 
tu_long <- tu %>%                                     # original wide data
  pivot_longer(
    cols      = c(apply, helpful, worth, learn),      # the four 7-pt items
    names_to  = "question",                           # which item
    values_to = "response"                            # 1–7 score
  ) %>%
  mutate(                                             
    question = recode(question,
                      apply   = "Apply",
                      helpful = "Helpful for Me",
                      worth   = "Time",
                      learn   = "Learn More"),
    response = factor(response, levels = 1:7)         # keep order
  )

cat_diff <- tu_long %>%                              
  group_by(question, manipulation_time, response) %>%           # count by cell & category
  summarise(n = n(), .groups = "drop") %>%           
  group_by(question, manipulation_time) %>%                     # within-cell percent
  filter(!is.na(response)) %>%
  mutate(pct = n / sum(n)) %>%
  ungroup() %>%
  select(question, manipulation_time, response, pct) %>%
  pivot_wider(                                      
    names_from  = manipulation_time,                 
    values_from = pct
  ) %>%
  mutate(                                             # pair-wise differences
    diff_all  = `All Documents`  - Baseline,          # (all-docs minus baseline)
    diff_miss = `Missing Documents` - Baseline        # (missing-docs minus baseline)
  ) %>%
  arrange(question, response)

print(cat_diff)

library(knitr)
library(kableExtra)

make_latex <- function(df, q){
  df %>%
    filter(question == q) %>%
    select(response, Baseline, Time_All, Time_Miss,
           diff_all, diff_miss) %>%
    kable(format = "latex", booktabs = TRUE, digits = 3,
          col.names = c("Response", "Baseline", "All Docs",
                        "Missing Docs", "Δ All–Base", "Δ Miss–Base"),
          caption = paste("Category–specific shares for", q)) %>%
    kable_styling(full_width = FALSE)
}

knitr::kable(cat_diff,
             digits = 3,
             format = "latex", 
             booktabs = T,
             col.names = c("Question", "Response","All Docs","Baseline","Missing Docs",
                           "All–Base","Miss–Base"),
             caption = "Category–specific shares and differences") 


#---------------------------------
# Dropping inattentive respondents
# 1) Failed manipulation check
# 2) Took fewer than 3 minutes to complete survey
tu.ia <- tu %>%
  filter(pass_manip == 1, 
         minutes > 3)

# All treatments and DVs
dif.of.means.df <- bind_rows(
  # apply
  dif.of.means.fun(tu.ia$apply[tu.ia$manipulation == "Time Estimates"],
                   tu.ia$apply[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$apply[tu.ia$manipulation_time == "All Documents"],
                   tu.ia$apply[tu.ia$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu.ia$apply[tu.ia$manipulation_time == "Missing Documents"],
                   tu.ia$apply[tu.ia$manipulation_time == "Baseline"]),
  # helpful
  dif.of.means.fun(tu.ia$helpful[tu.ia$manipulation == "Time Estimates"],
                   tu.ia$helpful[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$helpful[tu.ia$manipulation_time == "All Documents"],
                   tu.ia$helpful[tu.ia$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu.ia$helpful[tu.ia$manipulation_time == "Missing Documents"],
                   tu.ia$helpful[tu.ia$manipulation_time == "Baseline"]),
  # worth
  dif.of.means.fun(tu.ia$worth[tu.ia$manipulation == "Time Estimates"],
                   tu.ia$worth[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$worth[tu.ia$manipulation_time == "All Documents"],
                   tu.ia$worth[tu.ia$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu.ia$worth[tu.ia$manipulation_time == "Missing Documents"],
                   tu.ia$worth[tu.ia$manipulation_time == "Baseline"]),
  # learn
  dif.of.means.fun(tu.ia$learn[tu.ia$manipulation == "Time Estimates"],
                   tu.ia$learn[tu.ia$manipulation == "Baseline"]),
  dif.of.means.fun(tu.ia$learn[tu.ia$manipulation_time == "All Documents"],
                   tu.ia$learn[tu.ia$manipulation_time == "Baseline"]),
  dif.of.means.fun(tu.ia$learn[tu.ia$manipulation_time == "Missing Documents"],
                   tu.ia$learn[tu.ia$manipulation_time == "Baseline"]))
# Outcome
dif.of.means.df$outcome <- c(rep(1, 3), rep(2, 3), rep(3, 3), rep(4, 3))
dif.of.means.df$outcome <- factor(dif.of.means.df$outcome,
                                  levels = 1:4, 
                                  labels = c("Apply", "Helpful for Me", "Worth Time", "Learn More"))

# Comparison
dif.of.means.df$compare <- c(rep(seq(1, 3), 4))
dif.of.means.df$compare <- factor(dif.of.means.df$compare,
                                  levels = 1:3, 
                                  labels = c("Time Estimates - \nBaseline",
                                             "All Documents - \nBaseline",
                                             "Missing Documents - \nBaseline"))

# Plot
dif.of.mean.plot <- ggplot(dif.of.means.df, 
                           aes(x = fct_rev(compare), y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
  geom_errorbar(width=0,linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  theme_bw(base_size =  14)+
  facet_wrap(~outcome, ncol = 4) + 
  coord_flip() +
  labs(y = "Difference in Means", x = ""); dif.of.mean.plot

# Sanity check
prop.test.values(table(tu.ia$manipulation, tu.ia$eligible)[c(1,2), ])

# Save as table
tab.elig <- table(tu.ia$manipulation, tu.ia$eligible)
tab.support <- table(tu.ia$manipulation, tu.ia$not_enough)
tab.elig2 <- table(tu.ia$manipulation_time, tu.ia$eligible)
tab.support2 <- table(tu.ia$manipulation_time, tu.ia$not_enough)

# For each treatment group
diff.prop <- bind_rows(prop.test.values(tab.elig[c(1, 2),]),
                       prop.test.values(tab.elig2[c(1, 2),]),
                       prop.test.values(tab.elig2[c(1, 3),]),
                       prop.test.values(tab.support[c(1, 2),]),
                       prop.test.values(tab.support2[c(1, 2),]),
                       prop.test.values(tab.support2[c(1, 3),]))

# Add in comparison text
diff.prop$compare <- "Time Estimates - \nBaseline"
diff.prop$condition <- c("Eligible for Program", "Not Enough Support")

# Add in comparison text
diff.prop$compare <- c(rep(c("Time Estimates - \nBaseline",
                             "All Documents - \nBaseline",
                             "Missing Documents - \nBaseline"), 2))
diff.prop$compare <- fct_relevel(diff.prop$compare,
                                 "Time Estimates - \nBaseline",
                                 "All Documents - \nBaseline",
                                 "Missing Documents - \nBaseline")
diff.prop$condition <- c(rep("Eligible for Program", 3), rep("Not Enough Support", 3))


# Graph
diff.prop.graph <- ggplot(diff.prop, 
                          aes(x = compare, y = difference)) +
  geom_hline(yintercept = 0, color = "black", linetype = "dashed")+
  geom_errorbar(width = 0, linewidth = 0.75, color = "darkgrey",
                aes(ymin = lower95, ymax = upper95)) +
  geom_errorbar(width=0,linewidth=1.5,color="black",
                aes(ymin=lower90, ymax=upper90)) +
  geom_point(size = 5, fill = "black") +
  facet_wrap(~condition) +
  theme_bw(base_size = 14) +
  labs(y = "Difference in Proportion", x = ""); diff.prop.graph

# Combine graphs
plot_grid(diff.prop.graph, dif.of.mean.plot, nrow = 2,
          align = "hv", axis = "l", labels = c("a.", "b."),
          rel_heights = c(1, 1.2))
# ggsave"composite_s3_attentive.pdf", width = 12, height = 7)

#-------------------------------------------------------------------------------
# Moderating variables

# Low income indicator
# 5 is up to 34,999
tu <- tu %>%
  mutate(low_income = ifelse(hhi <= 5, 1, 0))
table(tu$low_income)

reg.elig.apply <- lm(apply ~ manipulation*eligible, data = tu); summary(reg.elig.apply)
reg.elig.worth <- lm(worth ~ manipulation*eligible, data = tu); summary(reg.elig.worth)
reg.elig.helpful <- lm(helpful ~ manipulation*eligible, data = tu); summary(reg.elig.helpful)
reg.elig.learn <- lm(learn ~ manipulation*eligible, data = tu); summary(reg.elig.learn)

texreg(
  l = list(reg.elig.apply, reg.elig.worth, reg.elig.helpful, reg.elig.learn),
  custom.model.names = c("Apply", "Helpful", "Worth Time", "Learn More"),
  stars = c(0.01, 0.05),
  digits = 2,
  booktabs = TRUE,
  use.packages = FALSE,
  caption = "Treatment moderated by perceived eligibility",
  label = "tab:mode_elig_s3"
)

reg.sup.apply <- lm(apply ~ manipulation*not_enough, data = tu); summary(reg.sup.apply)
reg.sup.worth <- lm(worth ~ manipulation*not_enough, data = tu); summary(reg.sup.worth)
reg.sup.helpful <- lm(helpful ~ manipulation*not_enough, data = tu); summary(reg.sup.helpful)
reg.sup.learn <- lm(learn ~ manipulation*not_enough, data = tu); summary(reg.sup.learn)

texreg(
  l = list(reg.sup.apply, reg.sup.worth, reg.sup.helpful, reg.sup.learn),
  custom.model.names = c("Apply", "Helpful", "Worth Time", "Learn More"),
  stars = c(0.01, 0.05),
  digits = 2,
  booktabs = TRUE,
  use.packages = FALSE,
  caption = "Treatment moderated by support perceived as not enough",
  label = "tab:mode_sup_s3"
)

reg.inc.apply <- lm(apply ~ manipulation*low_income, data = tu); summary(reg.inc.apply)
reg.inc.worth <- lm(worth ~ manipulation*low_income, data = tu); summary(reg.inc.worth)
reg.inc.helpful <- lm(helpful ~ manipulation*low_income, data = tu); summary(reg.inc.helpful)
reg.inc.learn <- lm(learn ~ manipulation*low_income, data = tu); summary(reg.inc.learn)

texreg(
  l = list(reg.inc.apply, reg.inc.worth, reg.inc.helpful, reg.inc.learn),
  custom.model.names = c("Apply", "Helpful", "Worth Time", "Learn More"),
  stars = c(0.01, 0.05),
  digits = 2,
  booktabs = TRUE,
  use.packages = FALSE,
  caption = "Treatment moderated by low-income ($< 35k$)",
  label = "tab:mode_inc_s3"
)

reg.elig.apply2 <- lm(apply ~ manipulation_time*eligible, data = tu); summary(reg.elig.apply)
reg.elig.worth2 <- lm(worth ~ manipulation_time*eligible, data = tu); summary(reg.elig.worth)
reg.elig.helpful2 <- lm(helpful ~ manipulation_time*eligible, data = tu); summary(reg.elig.helpful)
reg.elig.learn2 <- lm(learn ~ manipulation_time*eligible, data = tu); summary(reg.elig.learn)

texreg(
  l = list(reg.elig.apply2, reg.elig.worth2, reg.elig.helpful2, reg.elig.learn2),
  custom.model.names = c("Apply", "Helpful", "Worth Time", "Learn More"),
  stars = c(0.01, 0.05),
  digits = 2,
  booktabs = TRUE,
  use.packages = FALSE,
  caption = "Effects by Documentation Availability: Treatment moderated by perceived eligibility",
  label = "tab:mode_elig2_s3"
)

reg.sup.apply2 <- lm(apply ~ manipulation_time*not_enough, data = tu); summary(reg.sup.apply2)
reg.sup.worth2 <- lm(worth ~ manipulation_time*not_enough, data = tu); summary(reg.sup.worth2)
reg.sup.helpful2 <- lm(helpful ~ manipulation_time*not_enough, data = tu); summary(reg.sup.helpful2)
reg.sup.learn2 <- lm(learn ~ manipulation_time*not_enough, data = tu); summary(reg.sup.learn2)

texreg(
  l = list(reg.sup.apply2, reg.sup.worth2, reg.sup.helpful2, reg.sup.learn2),
  custom.model.names = c("Apply", "Helpful", "Worth Time", "Learn More"),
  stars = c(0.01, 0.05),
  digits = 2,
  booktabs = TRUE,
  use.packages = FALSE,
  caption = "Effects by Documentation Availability: Treatment moderated by support perceived as not enough",
  label = "tab:mode_sup2_s3"
)

reg.inc.apply2 <- lm(apply ~ manipulation_time*low_income, data = tu); summary(reg.inc.apply2)
reg.inc.worth2 <- lm(worth ~ manipulation_time*low_income, data = tu); summary(reg.inc.worth2)
reg.inc.helpful2 <- lm(helpful ~ manipulation_time*low_income, data = tu); summary(reg.inc.helpful2)
reg.inc.learn2 <- lm(learn ~ manipulation_time*low_income, data = tu); summary(reg.inc.learn2)

texreg(
  l = list(reg.inc.apply2, reg.inc.worth2, reg.inc.helpful2, reg.inc.learn2),
  custom.model.names = c("Apply", "Helpful", "Worth Time", "Learn More"),
  stars = c(0.01, 0.05),
  digits = 2,
  booktabs = TRUE,
  use.packages = FALSE,
  caption = "Effects by Documentation Availability: Treatment moderated by low-income ($< 35k$)",
  label = "tab:mode_inc2_s3"
)

#-------------------------------------------------------------------------------
# Balance checks

tu.balance <- tu %>%
  mutate(
    baseline = ifelse(manipulation == "Baseline", 1, 0),
    time_estimates = ifelse(manipulation == "Time Estimates", 1, 0),
    all_documents = ifelse(manipulation_time == "All Documents", 1, 0),
    missing_documents = ifelse(manipulation_time == "Missing Documents", 1, 0)
  )


reg.balance1 = lm(baseline ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)
reg.balance2 = lm(time_estimates ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)
reg.balance3 = lm(all_documents ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)
reg.balance4 = lm(missing_documents ~ factor(female) +
                    factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(region) +
                    as.numeric(hhi) + as.numeric(education), 
                  data = tu.balance)

texreg(list(reg.balance1, reg.balance2, reg.balance3, reg.balance4))

#-------------------------------------------------------------------------------
# ANOVA and Regression

# DVs
dvs <- c("eligible", "apply", "helpful", "worth", "learn", "not_enough")

# Run ANOVAs for each DV and store results in a list
anova_results <- lapply(dvs, function(dv) {
  formula <- as.formula(paste(dv, "~ manipulation"))
  aov(formula, data = tu)
})

lapply(anova_results, summary)

# Separated
anova_results <- lapply(dvs, function(dv) {
  formula <- as.formula(paste(dv, "~ manipulation_time"))
  aov(formula, data = tu)
})

lapply(anova_results, summary)

# Run regressions for each dependent variable and store results in a list
regression_results <- lapply(dvs, function(dv) {
  formula <- as.formula(paste(dv, "~ all_documents + missing_documents + 
                    factor(female) + factor(race) +
                    as.numeric(age) + factor(party) + 
                    factor(region) +
                    as.numeric(hhi) + as.numeric(education)"))
  lm(formula, data = tu.balance)
})

r.results <- lapply(regression_results, summary)
texreg(r.results)

#-------------------------------------------------------------------------------
# Summary statistics
tu.balance %>%
  select(baseline, all_documents, missing_documents, 
         eligible, apply, helpful, worth, learn, not_enough,
         female, race, age, party, region, hhi, education, 
         pass_manip, minutes) %>%
  stargazer::stargazer(type = "latex", summary = T, digits = 2)

